Data preparation

main_alleles <- c("WT", "KRAS_G12A", "KRAS_G12C", "KRAS_G12D", "KRAS_G12V", "KRAS_G13D")

dusp_rna_data <- readRDS("~/Downloads/coad-dusp-rna_expression.rds") %.% {
  filter(!is_hypermutant)
  select(-is_hypermutant)
  mutate(
    allele = ifelse(ras_allele %in% !!main_alleles, ras_allele, "Other"),
    allele = str_remove(allele, "KRAS_"),
    allele = factor_alleles(allele)
  )
}

# Put DUSP genes in order.
dusp_order <- unique(dusp_rna_data$hugo_symbol)
dusp_num <- as.numeric(str_remove_all(dusp_order, "[:alpha:]"))
dusp_order <- dusp_order[order(dusp_num)]
dusp_rna_data$hugo_symbol <- factor(dusp_rna_data$hugo_symbol, levels = dusp_order)
dusp_rna_data %>%
  filter(is.na(rna_expr)) %>%
  count(hugo_symbol, sort = TRUE)
hugo_symbol n
DUSP13 149
DUSP21 149
dusp_rna_data %>%
  filter(rna_expr <= 0) %>%
  count(hugo_symbol)
hugo_symbol n
DUSP5P 73
DUSP9 34
DUSP13 217
DUSP15 2
DUSP21 284
DUSP26 58
DUSP27 11

Set negative values to 0.

dusp_rna_data %<>% mutate(rna_expr = map_dbl(rna_expr, ~ max(0, .x)))

Inspect the distribution of RNA expression values

dusp_rna_data %>%
  filter(!is.na(rna_expr)) %>%
  mutate(rna_expr = rna_expr + 1) %>%
  ggplot(aes(x = allele, y = rna_expr)) +
  facet_wrap(~hugo_symbol, scales = "free_y") +
  geom_boxplot(outlier.shape = NA) +
  scale_y_continuous(trans = "log10") +
  theme(
    panel.grid.major.x = element_blank(),
    axis.text = element_text(size = 8),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
  labs(x = NULL, y = "RNA expression (log10 + 1)")

Remove DUSP13 and DUSP21 because they are missing data and have very low expression levels.

IGNORE_DUSPS <- c("DUSP13", "DUSP21")
dusp_rna_data %<>% filter(!hugo_symbol %in% IGNORE_DUSPS)
plot_dusp_distribtions <- function(df, x) {
  df %>%
    ggplot(aes(x = {{ x }})) +
    facet_wrap(~hugo_symbol, scales = "free") +
    scale_y_continuous(expand = expansion(c(0, 0.02))) +
    geom_density() +
    theme(
      axis.text = element_text(size = 7),
      axis.text.x = element_text(angle = 30, hjust = 1),
      strip.text = element_text(size = 7),
      panel.spacing = unit(1, "mm")
    ) +
    labs(x = "RNA expression")
}

plot_dusp_distribtions(dusp_rna_data, rna_expr)

dusp_rna_data %>%
  mutate(rna_expr = log10(rna_expr + 1)) %>%
  plot_dusp_distribtions(rna_expr)

dusp_rna_data %>%
  mutate(rna_expr = sqrt(rna_expr)) %>%
  plot_dusp_distribtions(rna_expr)

dusp_rna_data %>%
  group_by(hugo_symbol) %>%
  mutate(rna_expr = scale_numeric(rna_expr)) %>%
  ungroup() %>%
  plot_dusp_distribtions(rna_expr)

dusp_rna_data %>%
  mutate(rna_expr = sqrt(rna_expr)) %>%
  group_by(hugo_symbol) %>%
  mutate(rna_expr = scale_numeric(rna_expr)) %>%
  ungroup() %>%
  plot_dusp_distribtions(rna_expr)

Use \(\log_10(\text{RNA} + 1)\), centralized, and scaled values for the analysis.

dusp_rna_data %<>%
  mutate(log10_rna_expr = log10(rna_expr + 1)) %>%
  group_by(hugo_symbol) %>%
  mutate(log10_z_rna = scale_numeric(log10_rna_expr)) %>%
  ungroup()

Check for correlations in DUSP expression

dusp_corr <- dusp_rna_data %>%
  pivot_wider(id = tumor_sample_barcode, names_from = hugo_symbol, values_from = log10_z_rna) %>%
  select(-tumor_sample_barcode) %>%
  corrr::correlate() 
#> 
#> Correlation method: 'pearson'
#> Missing treated using: 'pairwise.complete.obs'
dusp_corr_pheat <- dusp_corr %>%
  as.data.frame() %>%
  column_to_rownames("rowname")
colnames(dusp_corr_pheat) <- str_remove(colnames(dusp_corr_pheat), "DUSP")
rownames(dusp_corr_pheat) <- str_remove(rownames(dusp_corr_pheat), "DUSP")
pheatmap::pheatmap(
  dusp_corr_pheat,
  border_color = NA,
  na_col = "white",
  main = "Correlation of DUSP gene expression",
)

corrr::network_plot(dusp_corr, min_cor = 0.3) +
  theme(plot.title = element_text(hjust = 0.5, face="bold")) +
  labs(title = "DUSP gene expression correlation network")

Modeling

new_allele_order <- as.character(sort(unique(dusp_rna_data$allele)))
new_allele_order <- c("WT", new_allele_order[new_allele_order != "WT"])

data <- dusp_rna_data %>%
  mutate(
    grp_allele = case_when(
      allele == "G13D" ~ "G13D",
      allele == "WT" ~ "WT",
      TRUE ~ "KRAS"
    ),
    grp_allele = factor(grp_allele, levels = c("WT", "G13D", "KRAS")),
    allele = factor(as.character(allele), levels = new_allele_order)
  )

# FOR TESTING
set.seed(0)
TESTING_DUSPS <- paste0("DUSP", 1:6)
TESTING_TSBS <- sample(unique(data$tumor_sample_barcode), 50)
data <- data %.%{
  filter(hugo_symbol %in% TESTING_DUSPS)
  filter(tumor_sample_barcode %in% TESTING_TSBS)
}
stash("m1", depends_on = "data", {
  m1 <- stan_glmer(log10_z_rna ~ (1 + allele|hugo_symbol), data = data)
})
#> Loading stashed object.
mcmc_trace(m1, pars = "(Intercept)") / mcmc_trace(m1, pars = "sigma")

mcmc_trace(m1, regex_pars = "DUSP1")

pp_check(m1, plotfun = "stat", stat = "mean")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

pp_check(m1, plotfun = "stat_2d", stat = c("mean", "sd"))

as.data.frame(m1) %.% {
  mutate(draw = row_number())
  select(draw, `(Intercept)`, tidyselect::contains("DUSP"))
  pivot_longer(
    -c(draw, `(Intercept)`), 
    names_to = "parameter", 
    values_to = "value"
  )
  mutate(
    parameter = str_remove_all(parameter, "[:punct:]"), 
    parameter = str_remove_all(parameter, "b|allele|hugosymbol|")
  )
  separate(parameter, c("allele", "dusp"), sep=" ")
  mutate(allele = str_replace(allele, "Intercept", "WT"))
} %>%
  ggplot(aes(value)) +
  facet_wrap(~ dusp, scales = "free") +
  geom_vline(xintercept = 0, lty = 2, color = "grey50", size = 0.9) +
  geom_density(aes(color = allele), size = 1) +
  scale_color_manual(values = short_allele_pal) +
  scale_x_continuous(expand = c(0, 0)) +
  scale_y_continuous(expand = expansion(c(0, 0.02))) +
  labs(x = "posterior distribution", y = "probability density", color = "KRAS allele")

bayestestR::describe_posterior(m1, effects = "all") %>%
  knitr::kable()
#> Possible multicollinearity between Sigma[hugo_symbol:alleleG13D,alleleG13D] and Sigma[hugo_symbol:alleleG12V,alleleG12V] (r = 0.83), Sigma[hugo_symbol:alleleOther,alleleOther] and Sigma[hugo_symbol:alleleG13D,alleleG13D] (r = 0.78). This might lead to inappropriate results. See 'Details' in '?rope'.
Parameter Effects Median CI CI_low CI_high pd ROPE_CI ROPE_low ROPE_high ROPE_Percentage Rhat ESS
1 (Intercept) fixed -0.1020180 89 -0.2021538 0.0118555 0.93700 89 -0.094184 0.094184 0.4538051 1.0004279 3150.258
2 b[(Intercept) hugo_symbol:DUSP1] random -0.0033220 89 -0.1133656 0.1048978 0.56425 89 -0.094184 0.094184 0.9488908 1.0004556 3498.335
8 b[alleleG12D hugo_symbol:DUSP1] random 0.0024271 89 -0.1731892 0.1992054 0.54175 89 -0.094184 0.094184 0.8096040 0.9997032 3425.864
14 b[alleleG12V hugo_symbol:DUSP1] random -0.0199189 89 -0.3288145 0.1341884 0.65575 89 -0.094184 0.094184 0.7343443 1.0004713 2097.068
20 b[alleleG13D hugo_symbol:DUSP1] random -0.0012194 89 -0.2374952 0.2270142 0.51975 89 -0.094184 0.094184 0.7458579 1.0001384 3347.772
26 b[alleleOther hugo_symbol:DUSP1] random 0.0130239 89 -0.1535039 0.2913674 0.61300 89 -0.094184 0.094184 0.7635496 1.0001203 2477.330
3 b[(Intercept) hugo_symbol:DUSP2] random 0.0002013 89 -0.1043200 0.1113167 0.50650 89 -0.094184 0.094184 0.9651783 0.9997999 3829.030
9 b[alleleG12D hugo_symbol:DUSP2] random -0.0031610 89 -0.1901994 0.1710168 0.54900 89 -0.094184 0.094184 0.8303847 1.0000014 4260.989
15 b[alleleG12V hugo_symbol:DUSP2] random -0.0002014 89 -0.2020301 0.2084184 0.50400 89 -0.094184 0.094184 0.7871384 1.0000359 5110.221
21 b[alleleG13D hugo_symbol:DUSP2] random -0.0028716 89 -0.2430732 0.1968223 0.54400 89 -0.094184 0.094184 0.7669194 1.0005367 3261.962
27 b[alleleOther hugo_symbol:DUSP2] random 0.0006820 89 -0.1964986 0.2224105 0.51325 89 -0.094184 0.094184 0.7871384 0.9994105 2788.925
4 b[(Intercept) hugo_symbol:DUSP3] random -0.0022825 89 -0.1222632 0.0978017 0.55300 89 -0.094184 0.094184 0.9590003 0.9996505 3699.982
10 b[alleleG12D hugo_symbol:DUSP3] random -0.0084162 89 -0.2328728 0.1546472 0.58675 89 -0.094184 0.094184 0.7955630 0.9997708 4554.277
16 b[alleleG12V hugo_symbol:DUSP3] random -0.0080819 89 -0.2725711 0.1661377 0.58725 89 -0.094184 0.094184 0.7674810 1.0006823 2983.357
22 b[alleleG13D hugo_symbol:DUSP3] random -0.0057184 89 -0.2569848 0.1873691 0.56575 89 -0.094184 0.094184 0.7596181 1.0004994 3171.646
28 b[alleleOther hugo_symbol:DUSP3] random 0.0126224 89 -0.1668028 0.2706089 0.61150 89 -0.094184 0.094184 0.7596181 0.9997056 2404.313
5 b[(Intercept) hugo_symbol:DUSP4] random 0.0108780 89 -0.0764832 0.1446277 0.64425 89 -0.094184 0.094184 0.8994664 0.9999886 3368.094
11 b[alleleG12D hugo_symbol:DUSP4] random 0.0089784 89 -0.1412304 0.2543169 0.60625 89 -0.094184 0.094184 0.7888234 1.0000660 2835.501
17 b[alleleG12V hugo_symbol:DUSP4] random 0.0127362 89 -0.1810369 0.3265972 0.61600 89 -0.094184 0.094184 0.7315361 0.9998390 2141.873
23 b[alleleG13D hugo_symbol:DUSP4] random 0.0284965 89 -0.1315713 0.4354323 0.69450 89 -0.094184 0.094184 0.6998034 0.9998821 1507.595
29 b[alleleOther hugo_symbol:DUSP4] random 0.0079805 89 -0.1480032 0.3246872 0.58025 89 -0.094184 0.094184 0.7652345 1.0001853 2170.777
6 b[(Intercept) hugo_symbol:DUSP5] random -0.0007927 89 -0.1125906 0.1175120 0.51750 89 -0.094184 0.094184 0.9438360 0.9997647 3669.691
12 b[alleleG12D hugo_symbol:DUSP5] random 0.0173721 89 -0.1275385 0.2696868 0.65750 89 -0.094184 0.094184 0.7652345 1.0008032 2118.533
18 b[alleleG12V hugo_symbol:DUSP5] random -0.0221086 89 -0.3443835 0.1389448 0.66500 89 -0.094184 0.094184 0.7250772 1.0002985 1826.631
24 b[alleleG13D hugo_symbol:DUSP5] random -0.0048746 89 -0.2990196 0.1901748 0.56300 89 -0.094184 0.094184 0.7520359 1.0001092 2374.914
30 b[alleleOther hugo_symbol:DUSP5] random 0.0135164 89 -0.1631829 0.2962370 0.62100 89 -0.094184 0.094184 0.7523168 1.0002454 2055.143
7 b[(Intercept) hugo_symbol:DUSP6] random -0.0075661 89 -0.1470893 0.0799647 0.60825 89 -0.094184 0.094184 0.9194047 1.0002279 2527.870
13 b[alleleG12D hugo_symbol:DUSP6] random 0.0137071 89 -0.1128226 0.2742952 0.63875 89 -0.094184 0.094184 0.7902275 1.0003357 2166.804
19 b[alleleG12V hugo_symbol:DUSP6] random 0.0082501 89 -0.1644193 0.3095453 0.57725 89 -0.094184 0.094184 0.7497894 1.0004011 2239.872
25 b[alleleG13D hugo_symbol:DUSP6] random 0.0108293 89 -0.1977732 0.2935028 0.60475 89 -0.094184 0.094184 0.7433305 1.0017371 1981.212
31 b[alleleOther hugo_symbol:DUSP6] random 0.0137211 89 -0.1572701 0.3008584 0.62725 89 -0.094184 0.094184 0.7525976 1.0001215 2191.198
32 Sigma[hugo_symbol:(Intercept),(Intercept)] random 0.0030886 89 0.0000000 0.0201564 1.00000 89 -0.094184 0.094184 1.0000000 1.0005108 1844.712
33 Sigma[hugo_symbol:alleleG12D,(Intercept)] random -0.0000105 89 -0.0125536 0.0071014 0.53275 89 -0.094184 0.094184 1.0000000 1.0003229 3431.947
35 Sigma[hugo_symbol:alleleG12V,(Intercept)] random 0.0000000 89 -0.0095464 0.0085672 0.50150 89 -0.094184 0.094184 1.0000000 1.0009940 3071.914
38 Sigma[hugo_symbol:alleleG13D,(Intercept)] random -0.0000001 89 -0.0107238 0.0114283 0.50250 89 -0.094184 0.094184 1.0000000 0.9998339 3672.967
42 Sigma[hugo_symbol:alleleOther,(Intercept)] random -0.0000133 89 -0.0134221 0.0084898 0.53900 89 -0.094184 0.094184 1.0000000 1.0006730 2743.760
34 Sigma[hugo_symbol:alleleG12D,alleleG12D] random 0.0062793 89 0.0000000 0.0622894 1.00000 89 -0.094184 0.094184 1.0000000 1.0010883 1694.146
36 Sigma[hugo_symbol:alleleG12V,alleleG12D] random 0.0000002 89 -0.0193775 0.0201184 0.50525 89 -0.094184 0.094184 1.0000000 1.0009466 3391.467
39 Sigma[hugo_symbol:alleleG13D,alleleG12D] random 0.0000226 89 -0.0185182 0.0221771 0.54650 89 -0.094184 0.094184 1.0000000 0.9992834 2639.105
43 Sigma[hugo_symbol:alleleOther,alleleG12D] random 0.0000256 89 -0.0168907 0.0205611 0.55025 89 -0.094184 0.094184 1.0000000 1.0004259 2269.516
37 Sigma[hugo_symbol:alleleG12V,alleleG12V] random 0.0082174 89 0.0000000 0.0844890 1.00000 89 -0.094184 0.094184 1.0000000 1.0014892 1479.199
40 Sigma[hugo_symbol:alleleG13D,alleleG12V] random 0.0000421 89 -0.0186403 0.0350749 0.56325 89 -0.094184 0.094184 1.0000000 1.0014704 1950.069
44 Sigma[hugo_symbol:alleleOther,alleleG12V] random -0.0000128 89 -0.0193774 0.0227035 0.53450 89 -0.094184 0.094184 1.0000000 0.9997171 3297.422
41 Sigma[hugo_symbol:alleleG13D,alleleG13D] random 0.0082641 89 0.0000000 0.0927430 1.00000 89 -0.094184 0.094184 1.0000000 1.0020634 1408.855
45 Sigma[hugo_symbol:alleleOther,alleleG13D] random 0.0000003 89 -0.0256208 0.0227178 0.50425 89 -0.094184 0.094184 1.0000000 1.0015006 3860.763
46 Sigma[hugo_symbol:alleleOther,alleleOther] random 0.0076143 89 0.0000000 0.0814343 1.00000 89 -0.094184 0.094184 1.0000000 1.0016100 1523.384